This R Markdown file explores data regarding internet-connected devices with via the Shodan API.
dplyr,
ggplot2, tidyr, httr2,
stringr, ggthemes, renv,
plotly, htmltools, maps,
knitr, kableExtra, jsonlite#Load required libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(httr2)
library(stringr)
library(ggthemes)
library(renv)
library(jsonlite)
library(plotly)
library(htmltools)
library(maps)
library(knitr)
library(kableExtra)
# Shodan api key and endpoint
api_key <- Sys.getenv("SHODAN_API_KEY") # Enter your API key here
api_url <- "https://api.shodan.io/shodan/host/search"
# Parameters to query
params <- list(
key = api_key,
query = "has_screenshot:true encrypted", # ransomware related query
limit = 200 # Limit the number of results
)
# Send the request with httr2
request <- request(api_url) |>
req_url_query(!!!params) |> # !!! Splices into key value pairs
req_throttle(1) # API rate limit
# Perform a request to get the response
response <- req_perform(request)
# Parse the JSON
data <- resp_body_json(response, check_type = T, simplifyVector = T)
# Convert into a dataframe
shodan_df <- data.frame(data$matches)
# Select interesting columns
shodan_df_ransomware <- shodan_df %>%
select(ip_str, port, transport, product, os, location, screenshot)
# Unnest nested columns
shodan_df_ransomware <- shodan_df_ransomware %>%
unnest(`screenshot`) %>%
unnest(`location`)
# Show Column names
colnames(shodan_df_ransomware)
# Select interesting columns from unnested dataframe
shodan_df_ransomware <- shodan_df_ransomware %>%
select(ip_str, port, transport, product,os, country_name, country_code, city,
longitude, latitude, text)
# Rename the columns
colnames(shodan_df_ransomware) <- c("IP Address", "Port", "Transport", "Service",
"Operating System", "Country", "Country Code",
"City", "Longitude", "Latitude","Ransom Letter")
# Group by Country Code
shodan_df_ransomware <- shodan_df_ransomware %>%
filter(!is.na(`Ransom Letter`)) %>% # Only keep rows with ransom letters
group_by(`Country Code`) %>%
# Arrange by Country
arrange(Country)
# Write the data to a CSV file
write.csv(shodan_df_ransomware, "shodan_ransomware.csv", row.names = FALSE)
# Create a frequency table with the counts
common_country_count <- table(shodan_df_ransomware$Country)
common_country_count <- sort(common_country_count, decreasing = TRUE) # Sort the count in descending order
# Count the number of times values in Country appear
shodan_count <- shodan_df_ransomware %>%
group_by(`Country Code`, `City`, `Longitude`, `Latitude`) %>%
count(Country)
# Get the names of the counts
common_country_names <- names(common_country_count)
# Get the most common country
most_common_country <- common_country_names[common_country_count == max(common_country_count)]
# Collapse the most common country into a single string
#most_common_country <- paste(most_common_country, collapse = ", ")
# Output the most common country
# If the most common country is the United States
if (length(most_common_country) > 1) {
country_text <- paste(most_common_country, collapse = ", ")
cat("According to the Shodan dataset,", country_text,
"are the countries with the highest number of ransomware infections, with",
max(common_country_count), "incidents.",
# Display the total number of ransomware infections
"There are a total of", nrow(shodan_df_ransomware), "ransomware infections worldwide!", "\n",
"\n",
# Statistical Analysis
"The average number of ransomware infections per country is",
round(mean(common_country_count), 2), "\n", # Average
"The median number of ransomware infections per country is",
median(common_country_count), "\n", # Median
"The standard deviation of ransomware infections per country is",
round(sd(common_country_count), 2), "\n") # Standard Deviation
} else {
cat("According to the Shodan dataset,", most_common_country,
"is the country with the highest number of ransomware infections, with",
max(common_country_count), "incidents.",
# Display the total number of ransomware infections
"There are a total of", nrow(shodan_df_ransomware), "ransomware infections worldwide!", "\n",
"\n",
# Statistical Analysis
"The average number of ransomware infections per country is",
round(mean(common_country_count), 2), "\n", # Average
"The median number of ransomware infections per country is",
median(common_country_count), "\n", # Median
"The standard deviation of ransomware infections per country is",
round(sd(common_country_count), 2), "\n") # Standard Deviation
}
## According to the Shodan dataset, Brazil, United States are the countries with the highest number of ransomware infections, with 12 incidents. There are a total of 119 ransomware infections worldwide!
##
## The average number of ransomware infections per country is 2.98
## The median number of ransomware infections per country is 1
## The standard deviation of ransomware infections per country is 3.14
# Table of Ransomware Infections by Country
kable(common_country_count, caption = "Ransomware Infections by Country",
col.names = c("Country", "Number of Infections"), format = "html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "500px") # Add a scroll box
| Country | Number of Infections |
|---|---|
| Brazil | 12 |
| United States | 12 |
| Germany | 10 |
| Mexico | 9 |
| China | 7 |
| Russian Federation | 7 |
| Argentina | 5 |
| Spain | 5 |
| Turkey | 5 |
| Czechia | 4 |
| India | 4 |
| Colombia | 3 |
| Pakistan | 3 |
| Ukraine | 3 |
| Chile | 2 |
| Kazakhstan | 2 |
| Singapore | 2 |
| Viet Nam | 2 |
| Bahrain | 1 |
| Bangladesh | 1 |
| Belarus | 1 |
| Bulgaria | 1 |
| Canada | 1 |
| Denmark | 1 |
| Egypt | 1 |
| Finland | 1 |
| France | 1 |
| Ghana | 1 |
| Japan | 1 |
| Lithuania | 1 |
| Moldova, Republic of | 1 |
| Nigeria | 1 |
| Panama | 1 |
| Peru | 1 |
| Poland | 1 |
| Portugal | 1 |
| Serbia | 1 |
| South Africa | 1 |
| Taiwan | 1 |
| Uzbekistan | 1 |
# Create a world map of ransomware infections
ggplot(shodan_count, aes(x = Longitude, y = Latitude, color = `Country`, size = n)) +
borders("world", colour = "gray50", fill = "gray50") +
# Remove Antarctica
#coord_quickmap(xlim = c(-180, 180), ylim = c(-60, 90)) +
geom_point() +
theme_map() +
labs(title = "Ransomware Infections by Country and City",
caption = "Source: Shodan API",
x = "Longitude",
y = "Latitude",
color = "Country Code") +
theme_fivethirtyeight() +
# Remove the gridlines and axis labels
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none", # Removes the fill legend
plot.title = element_text(hjust = 0.5)) # Center the title
# Make the map interactive
p <- ggplot(shodan_count, aes(x = Longitude, y = Latitude,
color = `Country`, size = n,
text = paste(" City:", `City`, "<br>", "Country:", `Country`,
"<br>", "Number of Infections:", n))) +
borders("world", colour = "gray50", fill = "gray50") +
# Remove Antarctica
#coord_quickmap(xlim = c(-180, 180), ylim = c(-60, 90)) +
geom_point() +
theme_map() +
labs(title = "Ransomware Infections by Country and City",
caption = "Source: Shodan API",
x = "Longitude",
y = "Latitude",
color = "Country Code") +
theme_fivethirtyeight() +
# Remove the gridlines and axis labels
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none", # Removes the fill legend
plot.title = element_text(hjust = 0.5)) # Center the title
ggplotly(p, tooltip = "text") %>%
layout(hoverlabel = list(
align = "left"
))